home *** CD-ROM | disk | FTP | other *** search
- /*
- File: 604Profile.c
-
- Contains: Routines to time at the insruction dispatch level small test routines.
-
- Written by: Mike Neil
-
- Copyright: This code is PUBLIC DOMAIN as of Friday June 19, 1998
-
- Note: The code you want to modify is at TestCode()
-
- main() is set up for G3 procesors. The constant "11" in the line:
- testState.time.pmc1 = testState.time.pmc1 - 11;
- has to be modified slightly for other processors, so that the first
- instruction is cycle 1 at instruction 1.
-
- The cycle counter is not available on 601 or 603 series processors, so this
- code doesn't work on those machines.
-
- */
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <time.h>
- #include <string.h>
- #include <Types.h>
- #include "Disassembler.h"
-
-
- // Defines for the Disassembler
- enum {
- kStandardDisAsmOptions =
- // Disassemble_Power |
- Disassemble_PowerPC32 |
- // Disassemble_PowerPC64 |
- // Disassemble_PowerPC601 |
- Disassemble_RsvBitsErr |
- Disassemble_FieldErr |
- Disassemble_Extended |
- // Disassemble_DecSI |
- // Disassemble_DecUI |
- Disassemble_DecField |
- // Disassemble_DecOffset |
- // Disassemble_DecPCRel |
- // Disassemble_Hex2sComp |
- // Disassemble_MinHex |
- // Disassemble_CRBits |
- // Disassemble_CRFltBits |
- Disassemble_BranchBO |
- Disassemble_TrapTO
- };
-
-
- // Structs for register state
- struct GeneralPurposeRegisters {
- UInt32 R0;
- UInt32 R1;
- UInt32 SP;
- UInt32 R3;
- UInt32 R4;
- UInt32 R5;
- UInt32 R6;
- UInt32 R7;
- UInt32 R8;
- UInt32 R9;
- UInt32 reserved1;
- UInt32 reserved2;
- UInt32 reserved3;
- UInt32 R13;
- UInt32 R14;
- UInt32 R15;
- UInt32 R16;
- UInt32 R17;
- UInt32 R18;
- UInt32 R19;
- UInt32 R20;
- UInt32 R21;
- UInt32 R22;
- UInt32 R23;
- UInt32 R24;
- UInt32 R25;
- UInt32 R26;
- UInt32 R27;
- UInt32 R28;
- UInt32 R29;
- UInt32 R30;
- UInt32 R31;
- };
- typedef struct GeneralPurposeRegisters GeneralPurposeRegisters;
-
-
- struct FloatingPointRegisters {
- double FPR0;
- double FPR1;
- double FPR2;
- double FPR3;
- double FPR4;
- double FPR5;
- double FPR6;
- double FPR7;
- double FPR8;
- double FPR9;
- double FPR10;
- double FPR11;
- double FPR12;
- double FPR13;
- double FPR14;
- double FPR15;
- double FPR16;
- double FPR17;
- double FPR18;
- double FPR19;
- double FPR20;
- double FPR21;
- double FPR22;
- double FPR23;
- double FPR24;
- double FPR25;
- double FPR26;
- double FPR27;
- double FPR28;
- double FPR29;
- double FPR30;
- double FPR31;
- };
- typedef struct FloatingPointRegisters FloatingPointRegisters;
-
-
- struct MachineState {
- UInt32 LR;
- UInt32 CTR;
- UInt32 CR;
- UInt32 XER;
- };
- typedef struct MachineState MachineState;
-
-
- /*
- The MMCR0 bit field definitions
-
- E
- N
- D D D I R
- I D D M M N T
- S P U S R T C Thresh PMC1Sel PMC2Sel
- 0 0 0 0 0 0 0 00 0 111111 1 1 1 1222222 222233
- 0 1 2 3 4 5 6 78 9 012345 6 7 8 9012345 678901
-
- mmcr0 0 1 0 0 0 0 1 00 0 000001 0 0 0 0000001 000100
- */
-
- struct TimingInfo604 {
- UInt32 mmcr0_On;
- UInt32 mmcr0_Off;
- SInt32 pmc1;
- SInt32 pmc2;
- };
- typedef struct TimingInfo604 TimingInfo604;
-
-
- // General test data structure, shared by asm code.
- struct TimingState
- {
- TimingInfo604 time; // 16 bytes 0 offset
- MachineState machine; // 16 bytes 16
- GeneralPurposeRegisters gpr; // 128 bytes 32
- FloatingPointRegisters fpr; // 256 bytes 160
- MachineState save_machine; // 16 bytes 416
- GeneralPurposeRegisters save_gpr; // 128 bytes 432
- FloatingPointRegisters save_fpr; // 256 bytes 560
- MachineState end_machine; // 16 bytes 816
- GeneralPurposeRegisters end_gpr; // 128 bytes 832
- };
- typedef struct TimingState TimingState;
-
-
- // Size of TestHarness prolog and epilog
- enum
- {
- kDUMP_END_STATE = 0,
- kHarnessPrologSize = (480L),
- kHarnessEpilogSize = ((34L * 4L) + 160L),
- };
-
-
- // Local prototypes
- void TestHarness(void);
- void TestCode(void);
-
- void ExecuteTest(TimingState *r3, UInt32 *r4);
- void SetUpTestState(TimingState *testState);
-
- UInt32 MaskInterrupts(void);
- void RestoreInterrupts(UInt32 oldSR);
-
- static UInt32 sDataAddress;
-
-
- /*------------------------------------------------------------------
- SetUpTestState [internal]
-
- This function initializes the state of the registers and memory
- for the test run. It is called once before each test run.
- ------------------------------------------------------------------*/
-
- void
- SetUpTestState(TimingState *testState)
- {
- UInt32 *data;
-
- testState->machine.CR = 0x02000000;
- testState->gpr.R3 = 0;
- testState->gpr.R4 = 0;
- testState->gpr.R5 = 0;
- testState->gpr.R6 = 0;
- testState->gpr.R7 = 0;
- testState->gpr.R8 = 0;
- testState->gpr.R9 = 0;
- testState->gpr.R29 = 0;
- testState->gpr.R31 = sDataAddress;
-
- data = (UInt32 *)(sDataAddress);
- data[5] = sDataAddress + 4; // value for r3 in: lwz r3,0x0014(r31)
-
- testState->fpr.FPR15 = 10.0;
- testState->fpr.FPR14 = 1.0;
- }
-
-
- /*------------------------------------------------------------------
- TestCode [internal]
-
- This is the code that will be tested for each run.
- kTestSize defines the length of the test in bytes (inst# * 4).
- ------------------------------------------------------------------*/
-
- enum
- {
- kTestSize = (16L * 4L)
- };
-
- asm
- void
- TestCode(void)
- {
- #if 0 // various instructions
- add r3,r3,r4
- mtcrf 0x01,r3
- mtcrf 0xFF,r3
- mcrxr cr0
- mtlr r3
- mtctr r3
- cror 0,1,2
- add r3,r3,r3
- cror 0,4,8
- add r3,r3,r3
- #endif
-
- #if 0 // this shows how slow setting the overflow instruction can be.
- lis r4,0x1000
- addo r4,r4,r4
- addo r4,r4,r4
- addo r4,r4,r4
- lis r4,0x1000
- addo r4,r4,r4
- addo r4,r4,r4
- addo r4,r4,r4
- #endif
-
- #if 0 // this is a somewhat slow set of store instructions.
- lis r4,0x10
- stw r4,0x40(r0)
- lis r4,0x10
- lis r5,0x10
- stw r6,0x44(r0)
- lis r4,0x10
- lis r5,0x10
- stw r6,0x48(r0)
- lis r4,0x10
- lis r5,0x10
- stw r6,0x48(r0)
- #endif
- #if 0 // this is an opitmized version of those store instructions.
- lis r4,0x10
- stw r4,0x40(r0)
- lis r4,0x10
- stw r6,0x44(r0)
- lis r5,0x10
- stw r6,0x48(r0)
- lis r4,0x10
- stw r6,0x48(r0)
- lis r5,0x10
- lis r4,0x10
- lis r5,0x10
- #endif
-
- #if 1 // This shows how breathtakingly slow a load can be if you
- // recently stored to that address. This turns out to be
- // slow even if the load and store differ by a multiple of
- // 4K. (at least on a G3)
- lis r4,0x10
- stw r4,0x40(r0)
- lis r4,0x10
- lwz r6,0x40(r0)
- lis r5,0x10
- stw r6,0x48(r0)
- lis r4,0x10
- lwz r6,0x1048(r0)
- lis r5,0x10
- lis r4,0x10
- lis r5,0x10
- #endif
-
- // These are here because if you try to happen the "blr" instruction
- // at the end of this routine, the machine will crash as you branch to
- // no place in particular
- li r3,0
- li r3,0
- li r3,0
- li r3,0
- li r3,0
- li r3,0
- li r3,0
- li r3,0
- li r3,0
- li r3,0
- }
-
-
- /*------------------------------------------------------------------
- main
-
- This will setup the test environment and run the tests,
- each test is run three times to remove ICache and DCache
- Issues. The code is page aligned and the test code is
- cache line aligned.
- ------------------------------------------------------------------*/
-
- void
- main(void)
- {
- TimingState testState;
- UInt32 SR;
- float ipc;
- Ptr codePagePtr;
- UInt32 *codePage;
- Ptr dataPagePtr;
- UInt32 *dataPage;
- UInt32 srcCodeAddress;
- UInt32 codeAddress;
- double idpc;
- UInt32 index;
- char mnemonic[256];
- char operand[256];
- char comment[256];
- DisassemblerStatus status;
- UInt32 lastCycle;
-
- printf ("Welcome, you've got code.\n\n");
-
- codePagePtr = NewPtrClear(4096L * 2); // Allocate space for the code
- codePage = (UInt32 *)(((UInt32)(codePagePtr) + 0xFFF) & 0xFFFFF000); // align it to a page.
-
- dataPagePtr = NewPtrClear(4096L * 4); // Allocate space for the data
- dataPage = (UInt32 *)((((UInt32)(codePagePtr) + 0xFFF) & 0xFFFFF000) + 4096L);
-
- sDataAddress = (UInt32)(dataPage);
-
- lastCycle = 0;
-
- printf("----Test Start----\n");
-
- for (index = 4; index <= kTestSize; index = index + 4)
- {
- srcCodeAddress = *(UInt32 *)(TestHarness); // Copy the prolog of the test into the buffer
- codeAddress = (UInt32)codePage;
-
- BlockMove((void *)(srcCodeAddress), (void *)(codeAddress), kHarnessPrologSize);
- MakeDataExecutable((void *)(codeAddress), kHarnessPrologSize);
-
- srcCodeAddress = *(UInt32 *)(TestCode); // Copy in the test code
- codeAddress = codeAddress + kHarnessPrologSize;
-
- BlockMove((void *)(srcCodeAddress), (void *)(codeAddress), index);
- MakeDataExecutable((void *)(codeAddress), index);
-
- status = ppcDisassembler( (UInt32 *)(codeAddress + index - 4), // Pointer to current instruction
- 0, // PC adjustment
- kStandardDisAsmOptions, // Disassembly option flags
- mnemonic, // Mnemonic string (to be filled in)
- operand, // Operand string (to be filled in)
- comment, // Comment string (to be filled in)
- 0, // User ref num (base of assembly)
- nil); // Call-back function for symbol name look-ups
-
- srcCodeAddress = *(UInt32 *)(TestHarness); // Copy in the prolog
- srcCodeAddress = srcCodeAddress + kHarnessPrologSize;
- codeAddress = codeAddress + index;
-
- BlockMove((void *)(srcCodeAddress), (void *)(codeAddress), kHarnessEpilogSize);
- MakeDataExecutable((void *)(codeAddress), kHarnessEpilogSize);
-
- memset(&testState, 0, sizeof(testState)); // Clear the test state
-
-
- testState.time.mmcr0_On = 0x42010044; // Setup the on value
- testState.time.mmcr0_Off = 0xC2010044; // and the off value
- testState.time.pmc1 = 0; // reset the counters
- testState.time.pmc2 = 0;
-
- SR = MaskInterrupts(); // Turn off interrupts
-
- SetUpTestState(&testState); // Run the test three times
- ExecuteTest(&testState, codePage);
- SetUpTestState(&testState);
- ExecuteTest(&testState, codePage);
- SetUpTestState(&testState);
- ExecuteTest(&testState, codePage);
-
- RestoreInterrupts(SR); // Restore interrupts
-
- testState.time.pmc1 = testState.time.pmc1 - 11; // Subtract off the overhead
- testState.time.pmc2 = testState.time.pmc2 - 4;
-
- if (testState.time.pmc1 != lastCycle)
- {
- printf("\n");
- if ((testState.time.pmc1 - lastCycle) != 1)
- printf("Stall!\n\n");
- lastCycle = testState.time.pmc1;
- }
- printf("Cycles: %3ld, Count: %3ld %s %s %s\n", testState.time.pmc1, testState.time.pmc2, mnemonic, operand, comment);
- }
-
- if (kDUMP_END_STATE)
- {
- UInt32 count;
- UInt32 *gpr;
-
- printf("\nLR 0x%08X\n", testState.end_machine.LR);
- printf("CTR 0x%08X\n", testState.end_machine.CTR);
- printf("CR 0x%08X\n", testState.end_machine.CR);
- printf("XER 0x%08X\n\n", testState.end_machine.XER);
-
- gpr = &(testState.end_gpr.R0);
- for (count = 0; count < 32; count++)
- printf("r%ld 0x%08X\n", count, gpr[count]);
- }
-
- printf("\n----Test End----\n");
-
- idpc = (double)(testState.time.pmc2) / (double)(testState.time.pmc1); // Compute Inst. Dispatched per Cycle
- printf("\nInstructions Dispatched per Cycle: %f\n", idpc);
-
- DisposePtr(codePagePtr);
- DisposePtr(dataPagePtr);
- }
-
-
- /*------------------------------------------------------------------
- ExecuteTest [internal]
-
- Glue code to jump to the TestHarness.
- ------------------------------------------------------------------*/
-
- asm
- void
- ExecuteTest(TimingState *r3, UInt32 *r4)
- {
- mtctr r4
- bctr
- }
-
-
- /*------------------------------------------------------------------
- TestHarness [internal]
-
- This code contains the prolog and epilog that are added to the
- test code for each run. It saves the current register state,
- loads the user specified registers, and run the test. It then
- saves the end register state and restores the registers to
- their initial values.
- ------------------------------------------------------------------*/
-
- asm
- void
- TestHarness(void)
- {
- mr r10, r3 // 0 // Put the pointer to our state in r10
- // Save the machine state
- mflr r3 // 4 // Save the LR
- stw r3, 416(r10) // 8
- mfctr r3 // 12 // Save the CTR
- stw r3, 4 + 416(r10) // 16
- mfcr r3 // 20 // Save the CR
- stw r3, 8 + 416(r10) // 24
- mfspr r3, 1 // 28 // Save the XER
- stw r3, 12 + 416(r10) // 32
-
- lwz r11, 0(r10) // 36 // Get the on command
- lwz r12, 4(r10) // 40 // Get the off command
-
- stw r10, 32 + 40(r10) // 44 // Save r10-r12 in the test state
- stw r11, 32 + 44(r10) // 48 //
- stw r12, 32 + 48(r10) // 52
-
- stmw r0, 432(r10) // 56 // Save the GPRs
-
- addi r3, r10, 552 // 76 // Save the FPU state
- stfdu fp0, 8(r3) // 80
- stfdu fp1, 8(r3)
- stfdu fp2, 8(r3)
- stfdu fp3, 8(r3)
- stfdu fp4, 8(r3)
- stfdu fp5, 8(r3)
- stfdu fp6, 8(r3)
- stfdu fp7, 8(r3)
- stfdu fp8, 8(r3)
- stfdu fp9, 8(r3)
- stfdu fp10, 8(r3)
- stfdu fp11, 8(r3)
- stfdu fp12, 8(r3)
- stfdu fp13, 8(r3)
- stfdu fp14, 8(r3)
- stfdu fp15, 8(r3)
- stfdu fp16, 8(r3)
- stfdu fp17, 8(r3)
- stfdu fp18, 8(r3)
- stfdu fp19, 8(r3)
- stfdu fp20, 8(r3)
- stfdu fp21, 8(r3)
- stfdu fp22, 8(r3)
- stfdu fp23, 8(r3)
- stfdu fp24, 8(r3)
- stfdu fp25, 8(r3)
- stfdu fp26, 8(r3)
- stfdu fp27, 8(r3)
- stfdu fp28, 8(r3)
- stfdu fp29, 8(r3)
- stfdu fp30, 8(r3)
- stfdu fp31, 8(r3)
- nop // 208
- nop // 212
- nop // 216
-
- nop // 220
- nop // 224
- nop // 228
- nop // 234
-
- mtspr 952, r12 // 60 // Turn off the Performance Monitor
- li r3, 0 // 64 // Clear PMC1 and PMC2
- mtspr 953, r3 // 68
- mtspr 954, r3 // 72
-
- addi r3, r10, 152 // 76 // Load the FPU state
- lfdu fp0, 8(r3) // 80
- lfdu fp1, 8(r3)
- lfdu fp2, 8(r3)
- lfdu fp3, 8(r3)
- lfdu fp4, 8(r3)
- lfdu fp5, 8(r3)
- lfdu fp6, 8(r3)
- lfdu fp7, 8(r3)
- lfdu fp8, 8(r3)
- lfdu fp9, 8(r3)
- lfdu fp10, 8(r3)
- lfdu fp11, 8(r3)
- lfdu fp12, 8(r3)
- lfdu fp13, 8(r3)
- lfdu fp14, 8(r3)
- lfdu fp15, 8(r3)
- lfdu fp16, 8(r3)
- lfdu fp17, 8(r3)
- lfdu fp18, 8(r3)
- lfdu fp19, 8(r3)
- lfdu fp20, 8(r3)
- lfdu fp21, 8(r3)
- lfdu fp22, 8(r3)
- lfdu fp23, 8(r3)
- lfdu fp24, 8(r3)
- lfdu fp25, 8(r3)
- lfdu fp26, 8(r3)
- lfdu fp27, 8(r3)
- lfdu fp28, 8(r3)
- lfdu fp29, 8(r3)
- lfdu fp30, 8(r3)
- lfdu fp31, 8(r3)
- nop // 208
- nop // 212
- nop // 216
-
- nop // 220
- nop // 224
- nop // 228
- nop // 234
-
- lwz r3, 0 + 16(r10) // 76 // Load the LR
- mtlr r3 // 80
- lwz r3, 4 + 16(r10) // 84 // Load the CTR
- mtctr r3 // 88
- lwz r3, 8 + 16(r10) // 92 // Load the CR
- mtcrf 0xFF, r3 // 96
- lwz r3, 12 + 16(r10) // 100 // Load the XER
- mtspr 1, r3 // 104 //
-
- mr r2, r10 // 108
- lmw r3, 12 + 32 (r2) // 112 // Load the GPRs
- lwz r0, 0 + 32 (r2) // 116
- nop // 120 // Don't load the Stack!
- lwz r2, 8 + 32 (r2) // 124
-
- nop // 140 // Some NOPs to make the first instruction on a cache line
- nop // 144
- cror 0,0,0 // 148 // dispatch syncronize
- sync // 128 // Syncronize the processor
- isync // 132 //
- mtspr 952, r11 // 136 // Start the counters
- b @beginInst // 152 // branch to the first instruction
- nop // 156 // Place holder
- @beginInst // 160
- /* Put test here. */
-
- /* End test here. */
- isync // Syncronize
-
- mtspr 952, r12 // Stop the counters
- stmw r0, 832(r10) // Save the end GPRs
-
- mfspr r4, 953 // Get PMC1 and PMC2
- mfspr r5, 954
- stw r4, 8(r10) // Save them in the timing state
- stw r5, 12(r10)
-
- mflr r3 // Save the LR
- stw r3, 816(r10)
- mfctr r3 // Save the CTR
- stw r3, 4 + 816(r10)
- mfcr r3 // Save the CR
- stw r3, 8 + 816(r10)
- mfspr r3, 1 // Save the XER
- stw r3, 12 + 816(r10)
-
- lwz r3, 0 + 416(r10) // Load the LR
- mtlr r3
- lwz r3, 4 + 416(r10) // Load the CTR
- mtctr r3
- lwz r3, 8 + 416(r10) // Load the CR
- mtcrf 0xFF, r3
- lwz r3, 12 + 416(r10) // Load the XER
- mtspr 1, r3
-
- addi r3, r10, 552 // Load the FPU state
- lfdu fp0, 8(r3)
- lfdu fp1, 8(r3)
- lfdu fp2, 8(r3)
- lfdu fp3, 8(r3)
- lfdu fp4, 8(r3)
- lfdu fp5, 8(r3)
- lfdu fp6, 8(r3)
- lfdu fp7, 8(r3)
- lfdu fp8, 8(r3)
- lfdu fp9, 8(r3)
- lfdu fp10, 8(r3)
- lfdu fp11, 8(r3)
- lfdu fp12, 8(r3)
- lfdu fp13, 8(r3)
- lfdu fp14, 8(r3)
- lfdu fp15, 8(r3)
- lfdu fp16, 8(r3)
- lfdu fp17, 8(r3)
- lfdu fp18, 8(r3)
- lfdu fp19, 8(r3)
- lfdu fp20, 8(r3)
- lfdu fp21, 8(r3)
- lfdu fp22, 8(r3)
- lfdu fp23, 8(r3)
- lfdu fp24, 8(r3)
- lfdu fp25, 8(r3)
- lfdu fp26, 8(r3)
- lfdu fp27, 8(r3)
- lfdu fp28, 8(r3)
- lfdu fp29, 8(r3)
- lfdu fp30, 8(r3)
- lfdu fp31, 8(r3)
- nop
- nop
- nop
-
- nop
- nop
- nop
- nop // 160 bytes long
-
- mr r2, r10
- lmw r3, 12 + 432(r2) // Load the GPRs
- lwz r0, 0 + 432 (r2)
- nop // Don't load the stack
- lwz r2, 8 + 432 (r2)
-
- blr // return
- }
-
-
-
- /*------------------------------------------------------------------
- InterruptsCode [internal]
-
- This code is used to turn on and off 68K interrupts.
- ------------------------------------------------------------------*/
-
- static UInt16 MaskInterruptsCode[] =
- {
- 0x40C0, // MOVE SR, D0
- 0x007C, 0x0700, // ORI.W #$0700, SR
- 0x4E75 // RTS
- };
-
-
- /* ProcInfo record for MaskInterrupts call */
- enum {
- uppMaskInterruptsProcInfo = kRegisterBased |
- RESULT_SIZE(kFourByteCode) |
- REGISTER_RESULT_LOCATION(kRegisterD0)
- };
-
- static UInt16 RestoreInterruptsCode[] =
- {
- 0x46C0, // MOVE D0, SR
- 0x4E75 // RTS
- };
-
- /* ProcInfo record for RestoreInterrupts call */
- enum {
- uppRestoreInterruptsProcInfo = kRegisterBased |
- REGISTER_ROUTINE_PARAMETER(1,kRegisterD0,kFourByteCode)
- };
-
-
-
- UInt32
- MaskInterrupts(void) {
- return(CallUniversalProc((UniversalProcPtr)(&MaskInterruptsCode), uppMaskInterruptsProcInfo));
- }
-
-
- void
- RestoreInterrupts(UInt32 oldSR) {
- CallUniversalProc((UniversalProcPtr)(&RestoreInterruptsCode), uppRestoreInterruptsProcInfo, oldSR);
- }
-
-
-